package com.kdtech.analyse.blog;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.kdtech.crawler.CrawlHTML;




import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Date;

import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;




import com.kdtech.entity.crawler.UrlMeta;
import com.kdtech.entity.data.NewsMeta;
import com.kdtech.utils.DateUtils;
import com.kdtech.utils.HtmlCleaner;
import com.kdtech.analyse.AnalyseNews;

public class QQBlogAnalyse implements AnalyseNews{

	//http://blog.qq.com/qzone/22465225/1354461553.htm
	
	public boolean isDetailPage(String url) {
		boolean bRet = false;
		String[] regex = {
				"http://blog.qq.com/qzone/[0-9]*/[0-9]*.htm"
				};
		//http://blog.qq.com/qzone/622002671/1274276790.htm
		for (int i = 0; i < regex.length; i++) {
			if (url.matches(regex[i])) {
				return true;
			}
		}
		return bRet;
	}

	
	public NewsMeta parserHtml(UrlMeta urlMeta) {
		String url = urlMeta.getUrl();
		if(!isDetailPage(url)){
		}
		String html = urlMeta.getHtml();
		if (html== null) {
		}
		NewsMeta blog = new NewsMeta();
		blog.setUrl(url);
		String title = null;
		String content = null;
		Long date = null;
		Integer clickNum=0;
		Integer commentNum=0;
		Integer deliverNum=0;
		String author = null;

		Document doc = Jsoup.parse(html);
		title  = doc.select("div.blog_header div.blog_title div#title.blog_tit_cont strong span span.blog_tit_detail").text().trim();
		if(StringUtils.isBlank(title)){
			title=doc.select("div.blog_cont div.blog_main div.blog_title h2#veryTitle span").text();
			if(StringUtils.isBlank(title)){
				title=doc.select("div#blogContentAnchor div.mode_table_mains div h4#veryTitle.mode_title").text();
			}
		}

		int pos = html.indexOf("\"pubtime\":");
		if(pos!=-1){
			String dateStr = html.substring(pos+"\"pubtime\":".length());
			pos = dateStr.indexOf(",");
			if(pos!=-1){
				dateStr=dateStr.substring(0,pos);
				if(StringUtils.isNotBlank(dateStr)){
					try {
						Long parseLong = Long.parseLong(dateStr);
						date = new Date(parseLong*1000).getTime();
					} catch (Exception e) {
						// TODO Auto-generated catch block
					}
				}
			}
		}
		if(date==null){
			date = DateUtils.matchDate(doc.select("div.in_auther").text());
			if(date==null){
				date = DateUtils.matchDate(doc.select("span#publishDate").text());
				if(date==null){
					date = DateUtils.matchDate(doc.select("span#publishDate").text());
				}
			}
		}

		content = HtmlCleaner.getContentHtml(url,  doc.select("div#blogDetailDiv"));
		if(StringUtils.isBlank(content)){
			content = HtmlCleaner.getContentHtml(url, doc.select("div#blogContainer"));
		}
		String updateUrl="";
		pos = url.indexOf(".htm");
		if(pos!=-1){
			String tmp = url.substring(0,pos);
			pos = tmp.indexOf("/qzone/");
			if(pos!=-1){
				tmp = tmp.substring(pos+"/qzone/".length());
				String[] split = tmp.split("/");
				if(split.length==2){
					updateUrl="http://r.qzone.qq.com/cgi-bin/user/qz_opcnt2?unikey=http%3A%2F%2Fuser.qzone.qq.com%2F"+split[0]+"%2Fblog%2F"+split[1];
					if(StringUtils.isNotBlank(updateUrl)){
						blog.setUpdateUrl(updateUrl);
						NewsMeta update = Update(blog);
						if(update!=null){
							blog.setClickNum(update.getClickNum());
							blog.setCommentNum(update.getCommentNum());
							//blog.setDeliverNum(update.getDeliverNum());
						}
					}
				}
			}
		}

		author = doc.select("strong#spacename").text();
		blog.setAuthor(author);
		blog.setTitle(title);
		blog.setContent(content);
		blog.setDate(date);
		blog.setUpdateUrl(url);
		return blog;
	}


	
	public NewsMeta Update(NewsMeta meta) {
		if(meta!=null){
			String updateUrl = meta.getUpdateUrl();
			if(updateUrl!=null){
				UrlMeta responseToURL = CrawlHTML.responseToURL(updateUrl);
				String html = responseToURL.getHtml();
				if(html!=null){
					html = html.replace("_Callback(", "").replace(");","");
					try {
						JSONObject json = JSONObject.parseObject(html);
						JSONArray array = json.getJSONArray("data");
						JSONObject object = array.getJSONObject(0).getJSONObject("current");
						JSONObject object2 = object.getJSONObject("newdata");

						meta.setCommentNum(object2.getIntValue("RZC"));
//						meta.setDeliverNum(object2.getIntValue("ZZ"));
						meta.setClickNum(object2.getIntValue("RZRD"));
						return meta;
					} catch (Exception e) {
//						e.printStackTrace();
					}
				}
			}
		}
		return null;
	}





	public static void main(String[] args) throws UnsupportedEncodingException {
		System.out.println(URLEncoder.encode("张艺谋", "GBK"));
//		http://www.soso.com/q?w=%D4%C1%B3%DB&cid=qs.blog.se&site=blog.qq.com&idx=f
		String url = "http://blog.qq.com/qzone/363109052/1263291637.htm";
		QQBlogAnalyse test = new QQBlogAnalyse();
		if(test.isDetailPage(url)){
			UrlMeta responseToURL = CrawlHTML.responseToURL(url);
			NewsMeta parserHtml = test.parserHtml(responseToURL);
			System.out.println(parserHtml);
//			System.out.println(test.Update(parserHtml));
		}else{
			System.out.println("不符合规则");
		}
	}
	
}
